1 Load data

2 Plot QC vars

Number of features and counts and percentage of mitochondrial and ribosomal RNA are plotted below. Red lines mark limits for filtering.

alldata <- PercentageFeatureSet(alldata, "^mt-", col.name = "percent_mito")

alldata <- PercentageFeatureSet(alldata, "^Rp[sl]", col.name = "percent_ribo")

alldata <- PercentageFeatureSet(alldata, "^Hb.-", col.name = "percent_hb")


mito_thresh = 10
ribo_thresh = 5
feat_thresh = 400
hb_thresh = 25


feats <- c("nFeature_RNA", "nCount_RNA", "percent_mito", "percent_ribo", "percent_hb")
glist = VlnPlot(alldata, group.by = "orig.ident", features = feats,  combine = FALSE, pt.size = 0)
names(glist) = feats
glist = lapply(glist, function(x){x + NoLegend() + 
    geom_point(position = position_jitter(), alpha = 0.1, size = 0.1)})


cowplot::plot_grid(align = "v",plotlist = list(glist[["nFeature_RNA"]] + geom_hline(yintercept = feat_thresh, color = "red"),
glist[["nCount_RNA"]],
glist[["percent_mito"]] + geom_hline(yintercept = mito_thresh, color = "red") ,
glist[["percent_ribo"]] + geom_hline(yintercept = ribo_thresh, color = "red")),
glist[["percent_hb"]] + geom_hline(yintercept = hb_thresh, color = "red"))

FeatureScatter(alldata, "nCount_RNA", "nFeature_RNA", group.by = "orig.ident", pt.size = 0.5,
                shuffle = TRUE) + geom_hline(yintercept = feat_thresh)

FeatureScatter(alldata, "percent_ribo", "percent_mito", group.by = "orig.ident", pt.size = 0.5,
                shuffle = TRUE) + geom_hline(yintercept = mito_thresh) + 
  geom_hline(yintercept = mito_thresh) +
  geom_vline(xintercept = ribo_thresh)

FeatureScatter(subset(alldata, idents = c("GW_EPI_d0", "STD_EPI_d0")),
               "percent_ribo", "percent_mito", group.by = "orig.ident",
               pt.size = 0.5, shuffle = TRUE, raster = TRUE) + 
  geom_hline(yintercept = mito_thresh) +
  geom_vline(xintercept = ribo_thresh)

3 Filter cells and genes

The same QC as above are plotted below, only including cells and genes that pass filtering limits (gene limit: > 3 copies expressed). The top expressed genes are also shown. Mitochondrial and ribosomal genes are filtered out of the data.

selected_c <- WhichCells(alldata, expression = nFeature_RNA > feat_thresh)
selected_f <- rownames(alldata)[Matrix::rowSums(alldata) > 3]

data.filt <- subset(alldata, features = selected_f, cells = selected_c)

selected_mito <- WhichCells(data.filt, expression = percent_mito < 10)
selected_ribo <- WhichCells(data.filt, expression = percent_ribo > 5)
selected_hb <- WhichCells(data.filt, expression = percent_hb < 25)

# and subset the object to only keep those cells
data.filt <- subset(data.filt, cells = intersect(selected_hb,intersect(selected_mito, selected_ribo)))

feats <- c("nFeature_RNA", "nCount_RNA", "percent_mito", "percent_ribo")
VlnPlot(data.filt, group.by = "orig.ident", features = feats, pt.size = 0.1, ncol = 2) + 
    NoLegend()

VlnPlot(data.filt, group.by = "orig.ident", features = feats, pt.size = 0, ncol = 2) + 
    NoLegend()

par(mar = c(4, 8, 2, 1))
C <- data.filt@assays$RNA@counts
C <- Matrix::t(Matrix::t(C)/Matrix::colSums(C)) * 100

most_expressed <- order(apply(C, 1, median), decreasing = T)[20:1]

# potentials = list()
# for(i in seq(1,dim(C)[2],20000)){
#   potentials[[length(potentials)+1]] = rownames(C)[order(apply(C[,i:min(i+19999, dim(C)[2])], 1, median), decreasing = T)[1000:1]]
# }
# length(unique(unlist(potentials)))
# 
# 
# most_expressed <- order(apply(C[unique(unlist(potentials)),], 1, median), decreasing = T)[20:1]


boxplot(as.matrix(t(C[most_expressed, ])), cex = 0.1, las = 1, xlab = "% total count per cell",
    col = (scales::hue_pal())(20)[20:1], horizontal = TRUE)

# Filter Malat1, Gm42418
data.filt <- data.filt[!grepl("Malat1", rownames(data.filt)), ]
data.filt <- data.filt[!grepl("Gm42418", rownames(data.filt)), ]

# Filter Mitocondrial
data.filt <- data.filt[!grepl("^mt-", rownames(data.filt)), ]

# Filter Ribossomal gene (optional if that is a problem on your data) data.filt
data.filt <- data.filt[ ! grepl('^Rp[sl]', rownames(data.filt)), ]

# Filter Hemoglobin gene (optional if that is a problem on your data)
data.filt <- data.filt[!grepl("^Hb.-", rownames(data.filt)), ]

# saveRDS(data.filt, "../analysis/data_pre_cellcycle.rds")

4 Analyze cell cycle

The S-phase and G2M-phase scores are calculated based on the cell cycle gene lists defined in the Seurat package (translated to mouse genes using biomaRt).

# Before running CellCycleScoring the data need to be normalized and
# logtransformed.

data.filt = NormalizeData(data.filt)

data.filt <- CellCycleScoring(object = data.filt, g2m.features = g2mFeat, 
    s.features = sFeat)

VlnPlot(data.filt, features = c("S.Score", "G2M.Score"), group.by = "orig.ident", 
    ncol = 3, pt.size = 0.1)

5 Find Doublets

Doublets are identified and excluded using the DoubletFinder package.

if(file.exists(( "../analysis/filtered_data_presweep.rds"))){
  data.filt = readRDS( "../analysis/filtered_data_presweep.rds")
}else{
  data.filt = FindVariableFeatures(data.filt, verbose = F)
  data.filt = ScaleData(data.filt, vars.to.regress = c("nFeature_RNA", "percent_mito"), 
      verbose = F)
  data.filt = RunPCA(data.filt, verbose = F, npcs = 20)
  data.filt = RunUMAP(data.filt, dims = 1:10, verbose = F)
  
  saveRDS(data.filt, "../analysis/filtered_data_presweep.rds")
}




if(file.exists("../analysis/sweeplist.RData")){
  load("../analysis/sweeplist.RData")
}else{
  sweeplist=list()
  for(i in unique(data.filt$orig.ident)){
    data.filt.subset=subset(data.filt, idents = i)
    sweep.res <- paramSweep_v3(data.filt.subset)#, num.cores = 3)
    sweep.stats <- summarizeSweep(sweep.res, GT = FALSE)
    bcmvn <- find.pK(sweep.stats)
    
    sweeplist[[i]] = bcmvn
    
  }
  
  save(sweeplist, file = "../analysis/sweeplist.RData")
}


if(file.exists("../analysis/singletList.RData")){
  load("../analysis/singletList.RData")
}else{
  singletList = list()
  
  
  for(i in unique(data.filt$orig.ident)){
    data.filt.subset=subset(data.filt, idents = i)
    nExp <- round(ncol(data.filt.subset) * 0.08)  # expect 8% doublets
    mypK = as.numeric(as.character(sweeplist[[i]]$pK[which.max(sweeplist[[i]]$BCmetric)]))
    data.filt.subset <- doubletFinder_v3(data.filt.subset, pN = 0.25, pK = mypK, nExp = nExp, PCs = 1:10)
    DF.name = colnames(data.filt.subset@meta.data)[grepl("DF.classification", colnames(data.filt.subset@meta.data))]
    singletList[[i]] = data.filt.subset@meta.data[, DF.name]
    names(singletList[[i]]) = colnames(data.filt.subset)
  }
  
  save(singletList, file = "../analysis/singletList.RData")
}


# sweep.res <- paramSweep_v3(data.filt)#, num.cores = 3)
# sweep.stats <- summarizeSweep(sweep.res, GT = FALSE)
# bcmvn <- find.pK(sweep.stats)
# par(mar = c(4, 4, 2, 2))
# barplot(bcmvn$BCmetric, names.arg =bcmvn$pK, las=2)





#data.filt@meta.data$DF = unlist(singletList)[match(colnames(data.filt), gsub(".*\\.","",names(unlist(singletList))))]
data.filt = AddMetaData(data.filt, unlist(singletList)[match(colnames(data.filt), gsub(".*\\.","",names(unlist(singletList))))], "DF")

cowplot::plot_grid(ncol = 2, DimPlot(data.filt, group.by = "orig.ident") + NoAxes(), 
    DimPlot(data.filt, group.by = "DF") + NoAxes())

DimPlot(data.filt, group.by = "DF", split.by = "orig.ident", ncol = 4) + NoAxes()

VlnPlot(data.filt, features = "nFeature_RNA", group.by = "DF",pt.size = 0.1)

VlnPlot(data.filt, features = "nFeature_RNA", split.by = "DF", group.by = "orig.ident", pt.size = 0.1)

data.filt = data.filt[, data.filt@meta.data[, "DF"] == "Singlet"]



cellfreq = as.data.frame(table(data.filt$orig.ident), stringsAsFactors = FALSE)
cellfreq$Day = sapply(cellfreq$Var1, function(x){strsplit(x, "_")[[1]][3]})
cellfreq$Type = sapply(cellfreq$Var1, function(x){strsplit(x, "_")[[1]][2]})
cellfreq$Treatment = sapply(cellfreq$Var1, function(x){strsplit(x, "_")[[1]][1]})
#png("/Volumes/tank/Private/Meetings/Lab meetings/MyLabMeetings/20210621/FilteredCellNumbers.png", res = 240, width = 1200, height = 800)
ggplot(cellfreq, aes(x = paste(Day, Treatment), y = Freq, color = Type)) + geom_point() + 
  theme(panel.background = element_blank(), legend.key = element_blank(),
        axis.text.x = element_text(angle = 45, hjust = 1)) + 
  labs(x = "Day and treatment", y = "Number of cells") +
  ylim(0,NA)

#dev.off()

Final cell counts:

Var1 Freq
GW_DN_d0 13825
GW_EPI_d0 2927
GW_IMM_d0 15715
STD_DN_d0 14575
STD_EPI_d0 5085
STD_IMM_d0 15762
saveRDS(data.filt, file = "../analysis/filtered_data.rds")


sessionInfo()
## R version 4.0.5 (2021-03-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Catalina 10.15.7
## 
## Matrix products: default
## BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
## LAPACK: /Users/jenfra/miniconda3/envs/singlecell/lib/libopenblasp-r0.3.15.dylib
## 
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] DoubletFinder_2.0.3 Matrix_1.4-1        gplots_3.1.1        rafalib_1.0.0       enrichR_3.0         pheatmap_1.0.12     ggplot2_3.3.6      
##  [8] cowplot_1.1.1       dplyr_1.0.9         venn_1.11           sp_1.5-0            SeuratObject_4.1.0  Seurat_4.1.1       
## 
## loaded via a namespace (and not attached):
##   [1] plyr_1.8.7            igraph_1.3.4          lazyeval_0.2.2        splines_4.0.5         listenv_0.8.0         scattermore_0.8       digest_0.6.29        
##   [8] htmltools_0.5.3       rsconnect_0.8.25      fansi_1.0.3           magrittr_2.0.3        tensor_1.5            cluster_2.1.2         ROCR_1.0-11          
##  [15] limma_3.46.0          globals_0.16.0        matrixStats_0.62.0    spatstat.sparse_2.1-1 colorspace_2.0-3      ggrepel_0.9.1         xfun_0.31            
##  [22] crayon_1.5.1          jsonlite_1.8.0        progressr_0.10.0      spatstat.data_2.2-0   survival_3.3-1        zoo_1.8-9             glue_1.6.2           
##  [29] polyclip_1.10-0       gtable_0.3.0          leiden_0.3.9          future.apply_1.8.1    abind_1.4-5           scales_1.1.1          DBI_1.1.3            
##  [36] spatstat.random_2.2-0 miniUI_0.1.1.1        Rcpp_1.0.9            viridisLite_0.4.0     xtable_1.8-4          reticulate_1.25       spatstat.core_2.4-4  
##  [43] bit_4.0.4             htmlwidgets_1.5.4     httr_1.4.2            RColorBrewer_1.1-2    ellipsis_0.3.2        ica_1.0-2             pkgconfig_2.0.3      
##  [50] farver_2.1.0          sass_0.4.0            uwot_0.1.11           deldir_1.0-6          utf8_1.2.2            tidyselect_1.1.2      labeling_0.4.2       
##  [57] rlang_1.0.4           reshape2_1.4.4        later_1.3.0           munsell_0.5.0         tools_4.0.5           cachem_1.0.6          cli_3.3.0            
##  [64] generics_0.1.3        ggridges_0.5.3        evaluate_0.15         stringr_1.4.0         fastmap_1.1.0         yaml_2.3.5            goftest_1.2-3        
##  [71] knitr_1.37            bit64_4.0.5           fitdistrplus_1.1-8    caTools_1.18.2        admisc_0.29           purrr_0.3.4           RANN_2.6.1           
##  [78] pbapply_1.5-0         future_1.27.0         nlme_3.1-155          mime_0.12             hdf5r_1.3.5           compiler_4.0.5        rstudioapi_0.13      
##  [85] plotly_4.10.0         curl_4.3.2            png_0.1-7             spatstat.utils_2.3-1  tibble_3.1.8          bslib_0.4.0           stringi_1.7.8        
##  [92] highr_0.9             rgeos_0.5-9           lattice_0.20-45       vctrs_0.4.1           pillar_1.8.0          lifecycle_1.0.1       spatstat.geom_2.4-0  
##  [99] lmtest_0.9-40         jquerylib_0.1.4       RcppAnnoy_0.0.19      data.table_1.14.2     bitops_1.0-7          irlba_2.3.5           httpuv_1.6.5         
## [106] patchwork_1.1.1       R6_2.5.1              bookdown_0.24         promises_1.2.0.1      KernSmooth_2.23-20    gridExtra_2.3         parallelly_1.32.1    
## [113] codetools_0.2-18      MASS_7.3-55           gtools_3.9.2          assertthat_0.2.1      rjson_0.2.21          withr_2.5.0           sctransform_0.3.3    
## [120] mgcv_1.8-39           parallel_4.0.5        grid_4.0.5            rpart_4.1.16          tidyr_1.2.0           rmarkdown_2.13        Rtsne_0.15           
## [127] shiny_1.7.2